/*******************************************************************************

[Last updated: June 4th, 2024]

This script contains the customized Stata functions (programs) required in the
data cleaning and analysis of the Admininistrative portion

*******************************************************************************/




* Utilities function -----------------------------------------------------------	
	
	* z-score calculation
	* Pure standardization using control group's mean and SD
	* Note: Sensitive to sampling, pay attention to sample restriction
	capture: program drop z_cal
	program z_cal
	args column
		qui: ttest `column', by(treatment)
			local control_mean = r(mu_1)
			local control_sd   = r(sd_1)
		gen z_`column'   = (`column' - `control_mean') / `control_sd'
	end	

	
	
	
	
* Programs to run regression and save result ----------------------------------- 	

	capture program drop ALL_REG
	program define ALL_REG
	args mode mat_prefix outcome
	
		* Capture control mean -------------------

			foreach group in $month_list {
			qui: sum `outcome' if (treatment == 0) & (`group' == 1)
			mat CM_`outcome'_`group' = r(mean)
			
			* To help with easer matrix 
			mat CM_`outcome'_`group'_t = r(mean)
			mat CM_`outcome'_`group'   = r(mean) // for multiple regression version
			}
			
		* Run regression -------------------------
		* 4 flavors
		
			* ITT - single regression: 
			* Outside parameters: $covariates, $month_treatment_list
			if inlist("`mode'", "ITT_single", "ITT_single_het") {
				di "reg `outcome' $covariates i.strata i.year_month, vce(cluster employee_id)"
				qui: reg `outcome' ///
						 $covariates i.strata i.year_month, ///
						 vce(cluster employee_id) 
			}
			
			* LASSO single regresion
			* Outside parameters: $month_treatment_list, $consideration, $month_list
			if "`mode'" == "LASSO_single" {
				qui: dsregress `outcome' $month_treatment_list, ///
					 controls((i.strata i.year_month $month_list) $consideration) ///
					 vce(cluster employee_id)
				
				* Store covariate choice
				*qui: estimates store eL_`mat_prefix'_`outcome'
				
			}
			
			* LASSO: single regression with heterogeneous treatment effects
			if "`mode'" == "LASSO_single_het" {
				qui: dsregress `outcome' $month_treatment_list $month_treat_character, ///
					 controls((i.strata i.year_month $month_list $month_character) $consideration) ///
					 vce(cluster employee_id)
				
				* Actually if we remove "character",the findings are the same :
				
				* Store covariate choice
				*qui: estimates store eL_`mat_prefix'_`outcome'
				
			}
			
			* ITT - multiple regression: 
			* Outside parameters: $covariates, $restriction
			if "`mode'" == "ITT_multi" {
				
				foreach group in $restriction {
				di "reg `outcome' treatment $covariates i.strata i.year_month if (`group' == 1), vce(cluster employee_id)"
				qui: reg `outcome' treatment ///
						  $covariates i.strata i.year_month if (`group' == 1), ///
					      vce(cluster employee_id)
						  
				qui: test treatment
				mat p_`mat_prefix'_`outcome'_`group'_t = CM_`outcome'_`group', _b[treatment], _se[treatment], r(p), e(N)
				mat li p_`mat_prefix'_`outcome'_`group'_t
				}
			}
			
			* LASSO - multiple regression: 
			* Outside parameters: $covariates, $covariates, $month_treatment_list, $consideration, $month_list, $restriction
			if "`mode'" == "LASSO_multi" {
				
				foreach group in $restriction {
				qui: dsregress `outcome' treatment ///
					 if (`group' == 1), ///
					 controls((i.strata i.year_month) $consideration) ///
					 vce(cluster employee_id)
				
				qui: test treatment
				mat p_`mat_prefix'_`outcome'_`group'_t = CM_`outcome'_`group', _b[treatment], _se[treatment], r(p), e(N) 
				mat li p_`mat_prefix'_`outcome'_`group'_t
				}
			}
			
			* LASSO - multiple regression - heterogeneous: 
			* Outside parameters: $covariates, $covariates, $month_treatment_list, $consideration, $month_list, $restriction
			if "`mode'" == "LASSO_multi_het" {
				
				foreach group in $restriction {
				qui: dsregress `outcome' treatment treatment_character ///
					 if (`group' == 1), ///
					 controls((i.strata i.year_month character) $consideration) ///
					 vce(cluster employee_id)
				
				qui: test treatment
				mat P1 =  _b[treatment], _se[treatment], r(p), e(N) 
				qui: test treatment_character
				mat P2 =  _b[treatment_character], _se[treatment_character], r(p), e(N)
				mat het_`mat_prefix'_`group' = P1, P2
				}
			}
		

		* Save as a 5 column matrix
		
			* For coefficients from the same regression
			* Target can be month_treatment_list or list of hetero
			if inlist("`mode'", "ITT_single", "LASSO_single") {
				mat temp = ., ., ., .
				
					foreach v in $reporting_target {
					qui: test `v'
					mat p_`mat_prefix'_`outcome'_`v' = CM_`outcome'_`v', _b[`v'], _se[`v'], r(p), e(N)
					mat li p_`mat_prefix'_`outcome'_`v'
					*mat temp = temp \ p_`outcome'_`v'
					}
			}
			
			if inlist("`mode'", "ITT_single_het", "LASSO_single_het") {
				
					local c_het = 0
					foreach v in $reporting_target {
					qui: test `v'
					local c_het = `c_het' + 1
					mat temp_`c_het' = _b[`v'], _se[`v'], r(p), e(N)
					}
					
					mat het_`mat_prefix' = temp_1, temp_2 \ temp_3, temp_4 \ temp_5, temp_6
					mat li het_`mat_prefix'
			}
			
						
		end	
	
	
	
	
* Programs to calculate Q_values -----------------------------------------------	

	capture program drop PQ
	program define PQ
	
		* Create the matrix
		mat temp_p = J(1,5,.)
		foreach member in $family {
			mat temp_p = temp_p \ `member'
		}
		
		* Invoke Anderson code
		preserve
			
			* Clear the memory
			clear			
			svmat double temp_p, name(pval)
			rename (pval1 pval2 pval3 pval4 pval5) /// 
				   (CM Coef SE pval N)
			drop if _n == 1
			
			* Calculate Q-values
			qui: do "${Filepath2}/function/function_0__modified_fdr_sharpened_qvalues.do"
				
				/*Note:
				Anderson's original code requires manual input of p-value.
				I modified it such that it can be run without manual input */
			
			* Save back q values
			
				rename bky06 qval
			
				* As large matrix for diagnosis purpose
				
				mkmat CM Coef SE pval qval N, matrix(Q_`fam_name')
				mat rownames Q_`fam_name' = $family
				mat li Q_`fam_name'
				
				* As single matrix for table purpose
				local counter = 0
				foreach member in $family {
				local counter = `counter' + 1
				mkmat CM Coef SE pval qval N if (_n == `counter'), matrix(q_`member')
				mat li q_`member'
				}
			
		restore
	
	end

	
	
	
* Functions for balance table --------------------------------------------------

	capture: program drop bal_row
	program define bal_row
	args outcome unique_indicator matrixname index
				
		qui: sum `outcome' 	if (treatment == 0 & `unique_indicator' == 1)
		mat C = r(N), r(mean), r(sd)
			
		qui: sum `outcome' 	if (treatment == 1 & `unique_indicator' == 1)
		mat T = r(N), r(mean), r(sd)

		qui: reg `outcome' treatment i.strata ///
							if (`unique_indicator' == 1), vce(cluster employee_id)
		mat d = _b[treatment], _se[treatment], e(N)
			
		qui: test treatment
		mat p = r(p)		

	mat `matrixname' = C, T, d, p
	end

	
	
	
* Function for unit switch analysis  -------------------------------------------
	
	capture: program drop reg_unit_switch
	program define reg_unit_switch
	args mode outcome addon matrix_name
	
		* Get CM
		qui: sum `outcome' if (treatment ==0)
		local cm = r(mean)
		
		* Regression
		if "`mode'" == "OLS" {
		qui: reg `outcome' treatment i.strata `addon', vce(cluster employee_id)
		}
		
		if "`mode'" == "LASSO" {
		dsregress `outcome' treatment, ///
					 controls((i.strata `addon') $consideration) ///
					 vce(cluster employee_id)
		}
		
		* Save result
		qui: test treatment
		mat `matrix_name' = `cm', _b[treatment], _se[treatment], r(p), ., e(N)
		mat li `matrix_name'
	
	end


		
